library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 'SeuratObject' was built under R 4.4.0 but the current version is
## 4.4.1; it is recomended that you reinstall 'SeuratObject' as the ABI
## for R may have changed
## 'SeuratObject' was built with package 'Matrix' 1.7.0 but the current
## version is 1.7.1; it is recomended that you reinstall 'SeuratObject' as
## the ABI for 'Matrix' may have changed
##
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
##
## intersect, t
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(scDblFinder)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
##
## The following object is masked from 'package:dplyr':
##
## count
##
##
## Attaching package: 'MatrixGenerics'
##
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
##
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
##
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
##
## The following object is masked from 'package:SeuratObject':
##
## intersect
##
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
##
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
##
## The following objects are masked from 'package:lubridate':
##
## second, second<-
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## The following objects are masked from 'package:dplyr':
##
## first, rename
##
## The following object is masked from 'package:utils':
##
## findMatches
##
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
##
## The following object is masked from 'package:lubridate':
##
## %within%
##
## The following object is masked from 'package:purrr':
##
## reduce
##
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## The following object is masked from 'package:sp':
##
## %over%
##
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
##
## Attaching package: 'Biobase'
##
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
##
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
##
## Attaching package: 'SummarizedExperiment'
##
## The following object is masked from 'package:Seurat':
##
## Assays
##
## The following object is masked from 'package:SeuratObject':
##
## Assays
set.seed(123)
Publication: Integrative analysis of spatial and single-cell transcriptome data from human pancreatic cancer reveals an intermediate cancer cell population associated with poor prognosis
(https://pmc.ncbi.nlm.nih.gov/articles/PMC10832111/)
dirs <- list.dirs(path = './input/', recursive = F, full.names = F)
for (x in dirs){
cts <- ReadMtx(mtx= paste0('./input/', x, '/matrix.mtx.gz'),
features = paste0('./input/', x, '/features.tsv.gz'),
cells=paste0('./input/', x, '/barcodes.tsv.gz'))
assign(x, CreateSeuratObject(counts = cts, min.cells = 3, project = x))
}
# merge object
seurat_object <- merge(GSM5831620_5_GEX_4, y=c(GSM5831621_5_GEX_5,GSM5831622_5_GEX_6, GSM5831623_5_GEX_9,GSM5831624_GEX_45_MM),
add.cell.ids=c(ls()[3:7]))
# create columns for metadata
# identity
unique(seurat_object@meta.data$orig.ident) # control identity
[1] “GSM5831620_5_GEX_4” “GSM5831621_5_GEX_5”
“GSM5831622_5_GEX_6”
[4] “GSM5831623_5_GEX_9” “GSM5831624_GEX_45_MM”
# cell id
seurat_object@meta.data$cell_id <- rownames(seurat_object@meta.data)
# percent mt
seurat_object[["percent.mt"]]<-PercentageFeatureSet(seurat_object, pattern="^MT-")
plot1 <- FeatureScatter(seurat_object, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(seurat_object, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2
# parameters from the publication
seurat_object <- subset(seurat_object, subset = percent.mt < 10 & nCount_RNA > 2000 & nFeature_RNA > 500 & nFeature_RNA < 7000)
head(seurat_object)
| orig.ident | nCount_RNA | nFeature_RNA | cell_id | percent.mt | |
|---|---|---|---|---|---|
| GSM5831620_5_GEX_4_AAACCTGAGCAACGGT-1 | GSM5831620_5_GEX_4 | 15275 | 3787 | GSM5831620_5_GEX_4_AAACCTGAGCAACGGT-1 | 1.8788871 |
| GSM5831620_5_GEX_4_AAACCTGAGCACCGTC-1 | GSM5831620_5_GEX_4 | 2462 | 1522 | GSM5831620_5_GEX_4_AAACCTGAGCACCGTC-1 | 3.8180341 |
| GSM5831620_5_GEX_4_AAACCTGAGTCAAGGC-1 | GSM5831620_5_GEX_4 | 2170 | 985 | GSM5831620_5_GEX_4_AAACCTGAGTCAAGGC-1 | 1.7972350 |
| GSM5831620_5_GEX_4_AAACCTGAGTCTCGGC-1 | GSM5831620_5_GEX_4 | 2757 | 1239 | GSM5831620_5_GEX_4_AAACCTGAGTCTCGGC-1 | 7.1091766 |
| GSM5831620_5_GEX_4_AAACCTGCAAAGAATC-1 | GSM5831620_5_GEX_4 | 17419 | 4277 | GSM5831620_5_GEX_4_AAACCTGCAAAGAATC-1 | 0.7922384 |
| GSM5831620_5_GEX_4_AAACCTGCAAGAGTCG-1 | GSM5831620_5_GEX_4 | 11342 | 3304 | GSM5831620_5_GEX_4_AAACCTGCAAGAGTCG-1 | 0.8993123 |
| GSM5831620_5_GEX_4_AAACCTGCAATCTGCA-1 | GSM5831620_5_GEX_4 | 9774 | 2783 | GSM5831620_5_GEX_4_AAACCTGCAATCTGCA-1 | 2.0360139 |
| GSM5831620_5_GEX_4_AAACCTGCAATTCCTT-1 | GSM5831620_5_GEX_4 | 21284 | 4344 | GSM5831620_5_GEX_4_AAACCTGCAATTCCTT-1 | 0.0845706 |
| GSM5831620_5_GEX_4_AAACCTGCAGTATAAG-1 | GSM5831620_5_GEX_4 | 26405 | 4383 | GSM5831620_5_GEX_4_AAACCTGCAGTATAAG-1 | 2.7797766 |
| GSM5831620_5_GEX_4_AAACCTGCATAGACTC-1 | GSM5831620_5_GEX_4 | 4595 | 1577 | GSM5831620_5_GEX_4_AAACCTGCATAGACTC-1 | 9.4885745 |
seurat_object
## An object of class Seurat
## 24192 features across 32681 samples within 1 assay
## Active assay: RNA (24192 features, 0 variable features)
## 5 layers present: counts.GSM5831620_5_GEX_4, counts.GSM5831621_5_GEX_5, counts.GSM5831622_5_GEX_6, counts.GSM5831623_5_GEX_9, counts.GSM5831624_GEX_45_MM
plot1 <- FeatureScatter(seurat_object, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(seurat_object, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2
# double check filtering
min(seurat_object@meta.data$nFeature_RNA)
## [1] 505
max(seurat_object@meta.data$nFeature_RNA)
## [1] 6991
min(seurat_object@meta.data$nCount_RNA)
## [1] 2002
max(seurat_object@meta.data$percent.mt)
## [1] 9.997791
for (i in c("GSM5831620_5_GEX_4","GSM5831621_5_GEX_5","GSM5831622_5_GEX_6","GSM5831623_5_GEX_9","GSM5831624_GEX_45_MM")){
sub <- subset(seurat_object, subset = orig.ident == i)
eval(parse(text=paste("sceDblF_",i," <- scDblFinder(GetAssayData(sub, layer = 'counts'), dbr = 0.07)",sep="")))
eval(parse(text=paste("score.",i," <- sceDblF_",i,"@colData@listData[['scDblFinder.score']]",sep="")))
eval(parse(text=paste("names(score.",i,") <- rownames(sceDblF_",i,"@colData)",sep="")))
}
## Assuming the input to be a matrix of counts or expected counts.
## Creating ~7249 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 1512 cells excluded from training.
## iter=1, 1559 cells excluded from training.
## iter=2, 1572 cells excluded from training.
## Threshold found:0.499
## 995 (11%) doublets called
## Assuming the input to be a matrix of counts or expected counts.
## Creating ~9278 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 1428 cells excluded from training.
## iter=1, 1499 cells excluded from training.
## iter=2, 1496 cells excluded from training.
## Threshold found:0.409
## 1344 (11.6%) doublets called
## Assuming the input to be a matrix of counts or expected counts.
## Creating ~3198 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 639 cells excluded from training.
## iter=1, 667 cells excluded from training.
## iter=2, 668 cells excluded from training.
## Threshold found:0.473
## 355 (8.9%) doublets called
## Assuming the input to be a matrix of counts or expected counts.
## Creating ~4848 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 937 cells excluded from training.
## iter=1, 1005 cells excluded from training.
## iter=2, 1016 cells excluded from training.
## Threshold found:0.539
## 711 (11.7%) doublets called
## Assuming the input to be a matrix of counts or expected counts.
## Creating ~1574 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 299 cells excluded from training.
## iter=1, 291 cells excluded from training.
## iter=2, 294 cells excluded from training.
## Threshold found:0.454
## 96 (4.9%) doublets called
doublets.info <- rbind(sceDblF_GSM5831620_5_GEX_4@colData,sceDblF_GSM5831621_5_GEX_5@colData,sceDblF_GSM5831622_5_GEX_6@colData,
sceDblF_GSM5831623_5_GEX_9@colData,sceDblF_GSM5831624_GEX_45_MM@colData)
seurat_object$is.doublet <- doublets.info$scDblFinder.class
seurat_object <- subset(seurat_object, subset = is.doublet == 'singlet')
# default parameters
seurat_object<-NormalizeData(seurat_object)
## Normalizing layer: counts.GSM5831620_5_GEX_4
## Normalizing layer: counts.GSM5831621_5_GEX_5
## Normalizing layer: counts.GSM5831622_5_GEX_6
## Normalizing layer: counts.GSM5831623_5_GEX_9
## Normalizing layer: counts.GSM5831624_GEX_45_MM
seurat_object<-FindVariableFeatures(seurat_object, selection.method = "vst", nfeatures=2000)
## Finding variable features for layer counts.GSM5831620_5_GEX_4
## Finding variable features for layer counts.GSM5831621_5_GEX_5
## Finding variable features for layer counts.GSM5831622_5_GEX_6
## Finding variable features for layer counts.GSM5831623_5_GEX_9
## Finding variable features for layer counts.GSM5831624_GEX_45_MM
all.genes<-rownames(seurat_object)
seurat_object<-ScaleData(seurat_object, features=all.genes)
## Centering and scaling data matrix
seurat_object <- RunPCA(seurat_object, features = VariableFeatures(object = seurat_object))
## PC_ 1
## Positive: SPINT2, KRT19, KRT8, CLDN4, EPCAM, KRT18, ELF3, FXYD3, S100P, VAMP8
## SMIM22, TSPAN1, LSR, SLPI, RAB25, OCIAD2, NQO1, CLDN7, PLPP2, AGR2
## SPINT1, EZR, TACSTD2, KRT7, SYNGR2, MLPH, LAMB3, MUC1, TMPRSS4, GPRC5A
## Negative: SPARC, BGN, CALD1, IGFBP7, VIM, DCN, COL1A2, SERPINF1, COL3A1, LUM
## SERPING1, COL1A1, C1S, THY1, MYL9, AEBP1, MXRA8, TAGLN, SERPINH1, COL6A2
## PCOLCE, IGFBP4, ID3, SFRP2, RARRES2, CTSK, MMP2, COL6A3, MGP, FBLN1
## PC_ 2
## Positive: LUM, RARRES2, TPM1, SFRP2, COL1A1, FBLN1, COL1A2, INHBA, THBS2, CTSK
## DCN, ISLR, MXRA8, COL10A1, COL3A1, COL8A1, CTHRC1, COL6A3, CCDC80, AEBP1
## SERPINF1, VCAN, C1S, CDH11, COL5A1, TPM2, SDC1, LMO7, PALLD, MXRA5
## Negative: PLVAP, PECAM1, RAMP2, CLEC14A, CD93, RAMP3, VWF, ESAM, EMCN, CYYR1
## CDH5, AQP1, EGFL7, CLDN5, ADGRL4, CD34, FLT1, CXorf36, IL3RA, TIE1
## S1PR1, CALCRL, FAM167B, MMRN2, SLCO2A1, TMEM255B, PTPRB, RBP7, GIMAP7, ROBO4
## PC_ 3
## Positive: TGM2, CDA, DHRS9, S100A4, FAM83A, LY6D, SNCG, EMP3, FYB1, S100A2
## EREG, MMP28, IGFBP6, FXYD5, SLC16A3, KRT7, RAC2, ANXA1, SYT8, CLIC3
## CSTB, LAPTM5, PPL, WFDC3, TYMP, CST4, TXNIP, SLC2A1, CAV1, WFDC2
## Negative: TESC, FAM3B, CA2, SMIM24, SPINK1, TSPAN8, CLDN18, AKR7A3, LYZ, MUC13
## TM4SF5, LGALS4, VSIG1, CD24, MSMB, IQGAP2, C12orf75, MUC6, PDIA2, REG1A
## TFF1, ANXA10, AQP5, HSD17B2, NR0B2, GMDS, MUC5AC, CLU, FOXA3, ARSE
## PC_ 4
## Positive: NDUFA4L2, RGS5, COX4I2, HIGD1B, EFHD1, NOTCH3, PPP1R14A, PTP4A3, GJA4, ADAP2
## HEYL, MCAM, MYH11, SOD3, ENPEP, ADIRF, MAP3K7CL, CCDC102B, GPX3, FAM162B
## RASL12, CSPG4, CSRP2, PLXDC1, GUCY1B1, SSTR2, TBX2, MAP1B, CDH6, WFDC1
## Negative: COL11A1, COL10A1, CTHRC1, DPYSL3, MMP2, THBS2, COL8A1, SFRP2, VCAN, FNDC1
## ITGA11, HTRA1, SDC1, INHBA, GJA1, HSPG2, LRRC15, MXRA5, PLXDC2, CD55
## GREM1, GJB2, COL12A1, SLC6A6, MFAP5, FBLN2, ANXA2, IGFBP3, MFAP2, C1QTNF3
## PC_ 5
## Positive: PSCA, CP, TXNIP, DHRS9, PRSS22, ZG16B, GSN, LEMD1, LGALS3, CLIC3
## TGM2, SMIM5, WFDC2, SPINT1, CYP3A5, MUC16, TMC5, TNFAIP2, PIGR, SPAG4
## EGLN3, PRSS8, LPCAT4, SLPI, TCIM, ADAM28, RHOV, MUC4, DUOX2, BCAS1
## Negative: UBE2C, MKI67, BIRC5, TPX2, TOP2A, CENPF, NUSAP1, CDC20, CDK1, CCNB2
## PTTG1, CDCA3, HMMR, PLK1, CCNB1, GTSE1, CCNA2, ANLN, ZWINT, ASPM
## MAD2L1, CKAP2L, NUF2, TK1, PKMYT1, TYMS, UBE2T, PBK, TROAP, CDKN3
seurat_object <- RunUMAP(seurat_object, reduction='pca', dims = 1:20)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 09:27:11 UMAP embedding parameters a = 0.9922 b = 1.112
## 09:27:11 Read 29180 rows and found 20 numeric columns
## 09:27:11 Using Annoy for neighbor search, n_neighbors = 30
## 09:27:11 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 09:27:12 Writing NN index file to temp file /var/folders/8b/2_66jk7x0jl1qc4177kp8svm0000gq/T//RtmpjbwV55/filedc9742a7a245
## 09:27:12 Searching Annoy index using 1 thread, search_k = 3000
## 09:27:18 Annoy recall = 100%
## 09:27:20 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 09:27:21 Initializing from normalized Laplacian + noise (using RSpectra)
## 09:27:22 Commencing optimization for 200 epochs, with 1228714 positive edges
## 09:27:30 Optimization finished
seurat_object <- FindNeighbors(seurat_object, reduction = 'pca', dims = 1:50)
## Computing nearest neighbor graph
## Computing SNN
seurat_object <- FindClusters(seurat_object, resolution = c(0.4))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 29180
## Number of edges: 1084133
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9594
## Number of communities: 29
## Elapsed time: 2 seconds
DimPlot(seurat_object, reduction='pca', label = TRUE)
DimPlot(seurat_object, label = TRUE)
p1 <- DimPlot(seurat_object, reduction = 'umap')
p2 <- DimPlot(seurat_object, reduction = 'umap', group.by = c("orig.ident"))
p1+p2
seurat_object <- IntegrateLayers(object = seurat_object, method = CCAIntegration, assay = "RNA", orig.reduction = "pca", new.reduction = "integrated.cca",
scale.layer = "scale.data",verbose = FALSE)
seurat_object <- RunUMAP(seurat_object, reduction='integrated.cca', dims = 1:50)
## 09:43:30 UMAP embedding parameters a = 0.9922 b = 1.112
## 09:43:30 Read 29180 rows and found 50 numeric columns
## 09:43:30 Using Annoy for neighbor search, n_neighbors = 30
## 09:43:30 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 09:43:31 Writing NN index file to temp file /var/folders/8b/2_66jk7x0jl1qc4177kp8svm0000gq/T//RtmpjbwV55/filedc975eef0c2
## 09:43:32 Searching Annoy index using 1 thread, search_k = 3000
## 09:43:36 Annoy recall = 100%
## 09:43:37 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 09:43:38 Initializing from normalized Laplacian + noise (using RSpectra)
## 09:43:39 Commencing optimization for 200 epochs, with 1307460 positive edges
## 09:43:47 Optimization finished
seurat_object <- FindNeighbors(seurat_object, reduction = 'integrated.cca', dims = 1:50)
## Computing nearest neighbor graph
## Computing SNN
seurat_object <- FindClusters(seurat_object, resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 29180
## Number of edges: 1172342
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9403
## Number of communities: 16
## Elapsed time: 4 seconds
DimPlot(seurat_object, label = TRUE)
p1 <- DimPlot(seurat_object, reduction = 'umap', label = T)
p2 <- DimPlot(seurat_object, reduction = 'umap', group.by = c("orig.ident"))
p1+p2
# fibroblast/CAF markers/endothelial cells
VlnPlot(seurat_object, features = toupper(c("Col1a1", "Col1a2", "Col3a1",
"Ngfr", "Pdgfra", "Acta2", "Cd74", "Plvap")))
# tumor markers
VlnPlot(seurat_object, features = toupper(c("Epcam", "Krt8", "Krt7", "Krt19", "Top2a", "Gata6", "Hmga2")))
#acinar/ductal/ADM
VlnPlot(seurat_object, features = toupper(c("Krt19", "Cftr", "Cpb1", "Cpa1", "Cela2a", "Sox9", "Mcam", "Spp1", "Reg3a", "Crp", "Plvap")))
## Warning: Removed 3642 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 3642 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3642 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 3642 rows containing missing values or values outside the scale range
## (`geom_point()`).
# fibroblasts/ CAF markers/ endothelial cells
FeaturePlot(seurat_object, features = toupper(c("Col1a1", "Col1a2", "Col3a1",
"Ngfr", "Pdgfra", "Acta2", "Cd74", "Plvap")))
#acinar/ductal/ADM
FeaturePlot(seurat_object, features = toupper(c("Krt19", "Cftr", "Cpb1", "Cpa1", "Cela2a", "Sox9", "Mcam", "Spp1", "Reg3a", "Crp", "Plvap")))
# tumor
FeaturePlot(seurat_object, features = toupper(c("Epcam", "Krt8", "Krt7", "Krt19", "Top2a", "Hmga2", "Gata6")))
# marker's from the paper
Ep <- c("EPCAM","ZBED6", "VGLL1", "TRIM54", "PIFO", "MSMB", "KRT6A", "FXYD2", "CDK1")
Fb <- c("COL1A1" , "VIT", "STRA6", "SFRP1", "MSLN", "LRRC15", "COL9A1", "CDK1")
VlnPlot(seurat_object, features = c(Fb, "PLVAP"))
VlnPlot(seurat_object, features = Ep)
FeaturePlot(seurat_object, features = c(Fb, "PLVAP"))
FeaturePlot(seurat_object, features = Ep)
# rename identified clusters
seurat_object <- RenameIdents(object = seurat_object, `1` = "Fibroblasts_1")
seurat_object <- RenameIdents(object = seurat_object, `2` = "Fibroblasts_2")
seurat_object <- RenameIdents(object = seurat_object, `4` = "Endothelial")
seurat_object <- RenameIdents(object = seurat_object, `5` = "Stellate")
seurat_object <- RenameIdents(object = seurat_object, `6` = "Fibroblasts_3")
seurat_object <- RenameIdents(object = seurat_object, `7` = "Fibroblasts_4")
seurat_object <- RenameIdents(object = seurat_object, `12` = "Fibroblasts_5")
seurat_object <- RenameIdents(object = seurat_object, `13` = "Fibroblasts_6")
seurat_object <- RenameIdents(object = seurat_object, `3` = "Epithelial_1")
seurat_object <- RenameIdents(object = seurat_object, `0` = "Epithelial_2")
seurat_object <- RenameIdents(object = seurat_object, `9` = "Epithelial_3")
seurat_object <- RenameIdents(object = seurat_object, `8` = "Epithelial_4")
seurat_object <- RenameIdents(object = seurat_object, `15` = "Epithelial_5")
seurat_object <- RenameIdents(object = seurat_object, `10` = "Premalignant")
DimPlot(seurat_object, reduction = "umap", label = T)
seurat_object@meta.data$CellType_cluster <- Idents(seurat_object)
seurat_object@meta.data$CellType <- gsub('\\_.*','',seurat_object@meta.data$CellType_cluster)
unique(seurat_object@meta.data$CellType_cluster)
## [1] Epithelial_2 Endothelial Fibroblasts_1 Fibroblasts_2 Epithelial_3
## [6] Premalignant Epithelial_1 Fibroblasts_3 Stellate 11
## [11] Epithelial_4 Fibroblasts_4 Fibroblasts_5 14 Fibroblasts_6
## [16] Epithelial_5
## 16 Levels: Premalignant Epithelial_5 Epithelial_4 Epithelial_3 ... 14
unique(seurat_object@meta.data$CellType)
## [1] "Epithelial" "Endothelial" "Fibroblasts" "Premalignant" "Stellate"
## [6] "11" "14"
seurat_object
## An object of class Seurat
## 24192 features across 29180 samples within 1 assay
## Active assay: RNA (24192 features, 2000 variable features)
## 11 layers present: counts.GSM5831620_5_GEX_4, counts.GSM5831621_5_GEX_5, counts.GSM5831622_5_GEX_6, counts.GSM5831623_5_GEX_9, counts.GSM5831624_GEX_45_MM, data.GSM5831620_5_GEX_4, data.GSM5831621_5_GEX_5, data.GSM5831622_5_GEX_6, data.GSM5831623_5_GEX_9, data.GSM5831624_GEX_45_MM, scale.data
## 3 dimensional reductions calculated: pca, umap, integrated.cca
head(seurat_object)
| orig.ident | nCount_RNA | nFeature_RNA | cell_id | percent.mt | is.doublet | RNA_snn_res.0.4 | seurat_clusters | RNA_snn_res.0.3 | CellType_cluster | CellType | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| GSM5831620_5_GEX_4_AAACCTGAGCAACGGT-1 | GSM5831620_5_GEX_4 | 15275 | 3787 | GSM5831620_5_GEX_4_AAACCTGAGCAACGGT-1 | 1.8788871 | singlet | 8 | 0 | 0 | Epithelial_2 | Epithelial |
| GSM5831620_5_GEX_4_AAACCTGAGCACCGTC-1 | GSM5831620_5_GEX_4 | 2462 | 1522 | GSM5831620_5_GEX_4_AAACCTGAGCACCGTC-1 | 3.8180341 | singlet | 3 | 4 | 4 | Endothelial | Endothelial |
| GSM5831620_5_GEX_4_AAACCTGAGTCAAGGC-1 | GSM5831620_5_GEX_4 | 2170 | 985 | GSM5831620_5_GEX_4_AAACCTGAGTCAAGGC-1 | 1.7972350 | singlet | 10 | 1 | 1 | Fibroblasts_1 | Fibroblasts |
| GSM5831620_5_GEX_4_AAACCTGAGTCTCGGC-1 | GSM5831620_5_GEX_4 | 2757 | 1239 | GSM5831620_5_GEX_4_AAACCTGAGTCTCGGC-1 | 7.1091766 | singlet | 2 | 2 | 2 | Fibroblasts_2 | Fibroblasts |
| GSM5831620_5_GEX_4_AAACCTGCAAAGAATC-1 | GSM5831620_5_GEX_4 | 17419 | 4277 | GSM5831620_5_GEX_4_AAACCTGCAAAGAATC-1 | 0.7922384 | singlet | 1 | 1 | 1 | Fibroblasts_1 | Fibroblasts |
| GSM5831620_5_GEX_4_AAACCTGCAAGAGTCG-1 | GSM5831620_5_GEX_4 | 11342 | 3304 | GSM5831620_5_GEX_4_AAACCTGCAAGAGTCG-1 | 0.8993123 | singlet | 3 | 4 | 4 | Endothelial | Endothelial |
| GSM5831620_5_GEX_4_AAACCTGCAATCTGCA-1 | GSM5831620_5_GEX_4 | 9774 | 2783 | GSM5831620_5_GEX_4_AAACCTGCAATCTGCA-1 | 2.0360139 | singlet | 2 | 2 | 2 | Fibroblasts_2 | Fibroblasts |
| GSM5831620_5_GEX_4_AAACCTGCAATTCCTT-1 | GSM5831620_5_GEX_4 | 21284 | 4344 | GSM5831620_5_GEX_4_AAACCTGCAATTCCTT-1 | 0.0845706 | singlet | 2 | 2 | 2 | Fibroblasts_2 | Fibroblasts |
| GSM5831620_5_GEX_4_AAACCTGCATAGACTC-1 | GSM5831620_5_GEX_4 | 4595 | 1577 | GSM5831620_5_GEX_4_AAACCTGCATAGACTC-1 | 9.4885745 | singlet | 2 | 2 | 2 | Fibroblasts_2 | Fibroblasts |
| GSM5831620_5_GEX_4_AAACCTGCATCTCGCT-1 | GSM5831620_5_GEX_4 | 2226 | 1253 | GSM5831620_5_GEX_4_AAACCTGCATCTCGCT-1 | 0.7187781 | singlet | 8 | 0 | 0 | Epithelial_2 | Epithelial |
# save for easier import
saveRDS(seurat_object, file = "./output/GSE194247_preprocessed.rds")
seurat_object <- readRDS("./output/GSE194247_preprocessed.rds")
head(seurat_object)
| orig.ident | nCount_RNA | nFeature_RNA | cell_id | percent.mt | is.doublet | RNA_snn_res.0.4 | seurat_clusters | RNA_snn_res.0.3 | CellType_cluster | CellType | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| GSM5831620_5_GEX_4_AAACCTGAGCAACGGT-1 | GSM5831620_5_GEX_4 | 15275 | 3787 | GSM5831620_5_GEX_4_AAACCTGAGCAACGGT-1 | 1.8788871 | singlet | 8 | 0 | 0 | Epithelial_2 | Epithelial |
| GSM5831620_5_GEX_4_AAACCTGAGCACCGTC-1 | GSM5831620_5_GEX_4 | 2462 | 1522 | GSM5831620_5_GEX_4_AAACCTGAGCACCGTC-1 | 3.8180341 | singlet | 3 | 4 | 4 | Endothelial | Endothelial |
| GSM5831620_5_GEX_4_AAACCTGAGTCAAGGC-1 | GSM5831620_5_GEX_4 | 2170 | 985 | GSM5831620_5_GEX_4_AAACCTGAGTCAAGGC-1 | 1.7972350 | singlet | 10 | 1 | 1 | Fibroblasts_1 | Fibroblasts |
| GSM5831620_5_GEX_4_AAACCTGAGTCTCGGC-1 | GSM5831620_5_GEX_4 | 2757 | 1239 | GSM5831620_5_GEX_4_AAACCTGAGTCTCGGC-1 | 7.1091766 | singlet | 2 | 2 | 2 | Fibroblasts_2 | Fibroblasts |
| GSM5831620_5_GEX_4_AAACCTGCAAAGAATC-1 | GSM5831620_5_GEX_4 | 17419 | 4277 | GSM5831620_5_GEX_4_AAACCTGCAAAGAATC-1 | 0.7922384 | singlet | 1 | 1 | 1 | Fibroblasts_1 | Fibroblasts |
| GSM5831620_5_GEX_4_AAACCTGCAAGAGTCG-1 | GSM5831620_5_GEX_4 | 11342 | 3304 | GSM5831620_5_GEX_4_AAACCTGCAAGAGTCG-1 | 0.8993123 | singlet | 3 | 4 | 4 | Endothelial | Endothelial |
| GSM5831620_5_GEX_4_AAACCTGCAATCTGCA-1 | GSM5831620_5_GEX_4 | 9774 | 2783 | GSM5831620_5_GEX_4_AAACCTGCAATCTGCA-1 | 2.0360139 | singlet | 2 | 2 | 2 | Fibroblasts_2 | Fibroblasts |
| GSM5831620_5_GEX_4_AAACCTGCAATTCCTT-1 | GSM5831620_5_GEX_4 | 21284 | 4344 | GSM5831620_5_GEX_4_AAACCTGCAATTCCTT-1 | 0.0845706 | singlet | 2 | 2 | 2 | Fibroblasts_2 | Fibroblasts |
| GSM5831620_5_GEX_4_AAACCTGCATAGACTC-1 | GSM5831620_5_GEX_4 | 4595 | 1577 | GSM5831620_5_GEX_4_AAACCTGCATAGACTC-1 | 9.4885745 | singlet | 2 | 2 | 2 | Fibroblasts_2 | Fibroblasts |
| GSM5831620_5_GEX_4_AAACCTGCATCTCGCT-1 | GSM5831620_5_GEX_4 | 2226 | 1253 | GSM5831620_5_GEX_4_AAACCTGCATCTCGCT-1 | 0.7187781 | singlet | 8 | 0 | 0 | Epithelial_2 | Epithelial |
# subset fibroblasts
Idents(seurat_object) <- "CellType"
# join layers
seurat_object <- JoinLayers(seurat_object)
fibroblasts <- subset(seurat_object, idents = c("Fibroblasts"))
# save for easier import
saveRDS(fibroblasts, file = "./output/GSE194247_fibroblasts.rds")
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.4.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Stockholm
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] scDblFinder_1.18.0 SingleCellExperiment_1.26.0
## [3] SummarizedExperiment_1.34.0 Biobase_2.64.0
## [5] GenomicRanges_1.56.2 GenomeInfoDb_1.40.1
## [7] IRanges_2.38.1 S4Vectors_0.42.1
## [9] BiocGenerics_0.50.0 MatrixGenerics_1.16.0
## [11] matrixStats_1.5.0 lubridate_1.9.3
## [13] forcats_1.0.0 stringr_1.5.1
## [15] purrr_1.0.4 readr_2.1.5
## [17] tidyr_1.3.1 tibble_3.2.1
## [19] ggplot2_3.5.1 tidyverse_2.0.0
## [21] dplyr_1.1.4 Seurat_5.2.1
## [23] SeuratObject_5.0.2 sp_2.2-0
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.4.1
## [3] later_1.4.1 BiocIO_1.14.0
## [5] bitops_1.0-9 polyclip_1.10-7
## [7] XML_3.99-0.17 fastDummies_1.7.5
## [9] lifecycle_1.0.4 edgeR_4.2.2
## [11] globals_0.16.3 lattice_0.22-6
## [13] MASS_7.3-61 magrittr_2.0.3
## [15] limma_3.60.6 plotly_4.10.4
## [17] sass_0.4.9 rmarkdown_2.29
## [19] jquerylib_0.1.4 yaml_2.3.10
## [21] metapod_1.12.0 httpuv_1.6.15
## [23] sctransform_0.4.1 spam_2.11-1
## [25] spatstat.sparse_3.1-0 reticulate_1.40.0
## [27] cowplot_1.1.3 pbapply_1.7-2
## [29] RColorBrewer_1.1-3 abind_1.4-8
## [31] zlibbioc_1.50.0 Rtsne_0.17
## [33] RCurl_1.98-1.16 GenomeInfoDbData_1.2.12
## [35] ggrepel_0.9.6 irlba_2.3.5.1
## [37] listenv_0.9.1 spatstat.utils_3.1-2
## [39] goftest_1.2-3 RSpectra_0.16-2
## [41] dqrng_0.4.1 spatstat.random_3.3-2
## [43] fitdistrplus_1.2-2 parallelly_1.42.0
## [45] DelayedMatrixStats_1.26.0 codetools_0.2-20
## [47] DelayedArray_0.30.1 scuttle_1.14.0
## [49] tidyselect_1.2.1 UCSC.utils_1.0.0
## [51] farver_2.1.2 viridis_0.6.5
## [53] ScaledMatrix_1.12.0 spatstat.explore_3.3-4
## [55] GenomicAlignments_1.40.0 jsonlite_1.9.0
## [57] BiocNeighbors_1.22.0 progressr_0.15.1
## [59] scater_1.32.1 ggridges_0.5.6
## [61] survival_3.7-0 tools_4.4.1
## [63] ica_1.0-3 Rcpp_1.0.14
## [65] glue_1.8.0 gridExtra_2.3
## [67] SparseArray_1.4.8 xfun_0.51
## [69] withr_3.0.2 fastmap_1.2.0
## [71] bluster_1.14.0 digest_0.6.37
## [73] rsvd_1.0.5 timechange_0.3.0
## [75] R6_2.6.1 mime_0.12
## [77] colorspace_2.1-1 scattermore_1.2
## [79] tensor_1.5 spatstat.data_3.1-4
## [81] generics_0.1.3 data.table_1.16.4
## [83] rtracklayer_1.64.0 httr_1.4.7
## [85] htmlwidgets_1.6.4 S4Arrays_1.4.1
## [87] uwot_0.2.2 pkgconfig_2.0.3
## [89] gtable_0.3.6 lmtest_0.9-40
## [91] XVector_0.44.0 htmltools_0.5.8.1
## [93] dotCall64_1.2 scales_1.3.0
## [95] png_0.1-8 spatstat.univar_3.1-1
## [97] scran_1.32.0 knitr_1.49
## [99] rstudioapi_0.17.1 tzdb_0.4.0
## [101] reshape2_1.4.4 rjson_0.2.23
## [103] nlme_3.1-166 curl_6.2.1
## [105] cachem_1.1.0 zoo_1.8-12
## [107] KernSmooth_2.23-24 vipor_0.4.7
## [109] parallel_4.4.1 miniUI_0.1.1.1
## [111] ggrastr_1.0.2 restfulr_0.0.15
## [113] pillar_1.10.1 grid_4.4.1
## [115] vctrs_0.6.5 RANN_2.6.2
## [117] promises_1.3.2 BiocSingular_1.20.0
## [119] beachmat_2.20.0 xtable_1.8-4
## [121] cluster_2.1.6 beeswarm_0.4.0
## [123] evaluate_1.0.3 locfit_1.5-9.11
## [125] cli_3.6.4 compiler_4.4.1
## [127] Rsamtools_2.20.0 rlang_1.1.5
## [129] crayon_1.5.3 future.apply_1.11.3
## [131] labeling_0.4.3 ggbeeswarm_0.7.2
## [133] plyr_1.8.9 stringi_1.8.4
## [135] viridisLite_0.4.2 deldir_2.0-4
## [137] BiocParallel_1.38.0 munsell_0.5.1
## [139] Biostrings_2.72.1 lazyeval_0.2.2
## [141] spatstat.geom_3.3-5 Matrix_1.7-1
## [143] RcppHNSW_0.6.0 hms_1.1.3
## [145] patchwork_1.3.0 sparseMatrixStats_1.16.0
## [147] future_1.34.0 statmod_1.5.0
## [149] shiny_1.10.0 ROCR_1.0-11
## [151] igraph_2.1.4 bslib_0.9.0
## [153] xgboost_1.7.8.1